# ---------------------------------------------------------------------------------------------------
# Automatic Generate Grid Input File
# Author: Meijian Yang, 06-14-2023
# NASA GISS, Columbia University
# Calls Soil_translator.R, Grid_cell_aggregation.R
# Needs SIMPLE-All_Africa_Countries_List.xlsx, Cultivar.csv, CROPGRIDSv1.05_NC_maps/*
#       Ghana_Crop_Area/GHA_Crops_HarvestArea.xlsx, Soil_nc4/*
# Generates Grid_input_CountryNames_period_scenario_ClimateModel.csv
# Intermediate files: SIMPLE_CropName_crop_area.csv, Soil_csv/*, Grid_input_CountryNames_soil.csv
# Updates: Meijian Yang, 07-16-2023
# Africa soil file: Grid_input_Africa_soil.csv; Don't need Soil_translator.R, Soil_nc4/* 
# ---------------------------------------------------------------------------------------------------

AutoGenGridInFile <- function(workDir, WeatherDir, MapExtention, species, cultivar) {
  library(readxl)
  
# Fixed parameters
#  dua	<- 90
  Water	<- "yes"
  IrrigationTrt <- 0
  MaxIntercept <- 0.95
  InitialBio <- 1
  InitialTT <- 0
  InitialFsolar <- 0.001
  Yield_obs	<- 0
  Label <- "Gridcell"
  
  inPath <- paste0(workDir,"Input/")

  split_WeatherDir <- strsplit(WeatherDir, split = "/")
  if (split_WeatherDir[[1]][3] == "future"){
    GridInFileName <- paste0(paste("Grid_input", species, paste(MapExtention, collapse = "_"), split_WeatherDir[[1]][3], split_WeatherDir[[1]][4], split_WeatherDir[[1]][5], sep = "_"), ".csv")
  }
  else if (split_WeatherDir[[1]][3] == "historical"){
    GridInFileName <- paste0(paste("Grid_input", species, paste(MapExtention, collapse = "_"), split_WeatherDir[[1]][3], split_WeatherDir[[1]][4], sep = "_"), ".csv")
  }
  else if (split_WeatherDir[[1]][3] == "baseline"){
    GridInFileName <- paste0(paste("Grid_input", species, paste(MapExtention, collapse = "_"), split_WeatherDir[[1]][3], sep = "_"), ".csv")
  }
  else{stop("AutoGenGridInFile.R: Wrong Weather Path Name!")}
  col_types <- c("numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "text", "text", "numeric", "numeric")
  SIMPLE_grid_data <- read_excel(paste0(inPath,"Map/SIMPLE-All_Africa_Countries_List.xlsx"), col_types = col_types)
  rownames(SIMPLE_grid_data) <- NULL
  CultivarTable <- read.csv(paste0(inPath,"Cultivar.csv"))
  
  if (any(MapExtention == "Africa")) {country_data <- SIMPLE_grid_data}
  else {country_data <- subset(SIMPLE_grid_data, Country %in% MapExtention)}

  # determine CO2 concentration and the 5th digit of Weather file based on period and scenario, future: 2050, baseline: 2010, historical: 2010
  D7 = "X"
  if (split_WeatherDir[[1]][3] == "future"){
    if (split_WeatherDir[[1]][4] == "SSP126"){CO2 <- 469; D5 <- "7"}
    else if (split_WeatherDir[[1]][4] == "SSP370"){CO2 <- 541; D5 <- "8"}
    else if (split_WeatherDir[[1]][4] == "SSP585"){CO2 <- 563}
    else {stop("AutoGenGridInFile.R: Unknown Scenario! Check Weather Path!")}
  }
  else if (split_WeatherDir[[1]][3] == "historical"){CO2 <- 389; D5 <- "8"}
  else if (split_WeatherDir[[1]][3] == "baseline"){CO2 <- 389; D5 <- "0"}
  else{stop("AutoGenGridInFile.R: Unknown Time Period! Check Weather Path!")}
  
  # determine the 6th and 8th digit of Weather file based on Climate Model
  if (split_WeatherDir[[1]][3] == "future"){
    if (split_WeatherDir[[1]][5] == "GFDL-ESM4"){D6 <- "1"}
    else if (split_WeatherDir[[1]][5] == "IPSL-CM6A-LR"){D6 <- "2"}
    else if (split_WeatherDir[[1]][5] == "MPI-ESM1-2-HR"){D6 <- "3"}
    else if (split_WeatherDir[[1]][5] == "MRI-ESM2-0"){D6 <- "4"}
    else if (split_WeatherDir[[1]][5] == "UKESM1-0-LL"){D6 <- "5"}
    else {stop("AutoGenGridInFile.R: Unknown Climate Model! Check Weather Path!")}
    D8 <- "1"
  }
  else if (split_WeatherDir[[1]][3] == "historical"){
    if (split_WeatherDir[[1]][4] == "GFDL-ESM4"){D6 <- "1"}
    else if (split_WeatherDir[[1]][4] == "IPSL-CM6A-LR"){D6 <- "2"}
    else if (split_WeatherDir[[1]][4] == "MPI-ESM1-2-HR"){D6 <- "3"}
    else if (split_WeatherDir[[1]][4] == "MRI-ESM2-0"){D6 <- "4"}
    else if (split_WeatherDir[[1]][4] == "UKESM1-0-LL"){D6 <- "5"}
    else {stop("AutoGenGridInFile.R: Unknown Climate Model! Check Weather Path!")}
    D8 <- "1"
  }
  else if (split_WeatherDir[[1]][3] == "baseline"){D6 <- "P"; D8 <- "X"} # W5E5 (bias-adjusted ERA-5)
  else{stop("AutoGenGridInFile.R: Unknown Time Period! Check Weather Path!")}

  # filter the grids by harvest area (> 5 hectare)
  filtered_data <- data.frame()
  area_threshold <- 5
  if("Ghana" %in% MapExtention || MapExtention == "Africa") {
    country_data_Ghana <- subset(country_data, Country == "Ghana")
    # try using GHA_Crops_HarvestArea.xlsx for Ghana
    GhanaCropAreaTable <- paste0(inPath,"Ghana_Crop_Area/","GHA_Crops_HarvestArea.xlsx")
    if (file.exists(GhanaCropAreaTable)){
      tryCatch({
        source("Grid_cell_aggregation.R")
        Grid_cell_aggregation(species, inPath)
        filterTable <- paste0(inPath,"Ghana_Crop_Area/","SIMPLE_",species,"_crop_area.csv")
        filterTable_data <- read.table(filterTable,header=TRUE,sep=",",stringsAsFactors=FALSE)
        filterTable_data[is.na(filterTable_data)] <- 0
        filter_data <- subset(filterTable_data, Area_sum >= area_threshold, select = c(long, lat)) #filterTable_data[filterTable_data$Area_sum > 50,][,1:2]
        filtered_data <- merge(country_data, filter_data[,1:2],  by = c("long", "lat"), all.x = FALSE, sort=FALSE)
      }, error = function(error_message) {
        message(paste0("AutoGenGridInFile.R: Cannot find ", species, " in GHA_Crops_HarvestArea.xlsx for Ghana, trying CROPGRIDS ... "))
        filtered_data <<- filterByCROPGRIDS(inPath, species, country_data_Ghana) #must use "<<-" to perform alternate function within tryCatch
      })
    }
    # if missing GHA_Crops_HarvestArea.xlsx table
    else{
      filtered_data <- filterByCROPGRIDS(inPath, species, country_data_Ghana)
    }

    # for countries other than Ghana
    country_data_exGhana <- subset(country_data, Country != "Ghana")
    if(dim(country_data_exGhana)[1] > 0){
      filtered_data <- rbind(filtered_data, filterByCROPGRIDS(inPath, species, country_data_exGhana))
    }
  } 
  # Ghana is not included in the country list
  else{filtered_data <- filterByCROPGRIDS(inPath, species, country_data)}

  if (prod(dim(filtered_data)) == 0) {print(paste0("No grid is planted with ", species, " in this domain!")); return(filtered_data)}
  
  # get cultivar parameters
  CultivarPara <- data.frame()
  CultivarMatches <- grepl(species, CultivarTable[,1], ignore.case = TRUE)
  if (any(!is.na(CultivarMatches))){CultivarPara <- CultivarTable[CultivarMatches, 3:9][cultivar,]}

  setwd(workDir)
  # generate soil data file
  SoilPara <- data.frame()
  soilTable <- paste0(inPath,"Grid_input_Africa_soil.csv")
  soilData <- read.table(soilTable,header=TRUE,sep=",",stringsAsFactors=FALSE)
  SoilPara <- merge(filtered_data[,1:2], soilData, by = c("long", "lat"), all.x = TRUE, sort=FALSE)

  # set planting and harvest date
  filtered_data <- CropCalendar(inPath, species, filtered_data, CultivarPara$dua)
  # if no SowingDate and HarvestDate provided, use fixed date from the cultivar file
  filtered_data[is.na(filtered_data[,9]),9] <- CultivarPara$SowingDate
  filtered_data[is.na(filtered_data[,10]),10] <- CultivarPara$HarvestDate

  ## prepare the input file
  numTrt <- dim(filtered_data)[1]

  Trt_d <- 1:numTrt
  row_d <- filtered_data[,4]
  col_d <- filtered_data[,5]
  Exp_d <- filtered_data[,3]
  long_d <- filtered_data[,1]
  lat_d <- filtered_data[,2]
  sharepercent_d <- filtered_data[,6]
  country_d <- filtered_data[,8]
  Species_d <- rep(species, times = numTrt)
  SowingDate_d <- filtered_data[,9]
  HarvestDate_d <- filtered_data[,10]
  Tsum_d <- rep(CultivarPara$Tsum, times = numTrt)
  I50A_d <- rep(CultivarPara$I50A, times = numTrt)
  I50B_d <- rep(CultivarPara$I50B, times = numTrt)
  HI_d <- rep(CultivarPara$HI, times = numTrt)
  WeatherID_d <- sapply(filtered_data[,7], function(x) paste0(x, D5, D6, D7, D8))
  CO2_d <- rep(CO2, times = numTrt)
  SoilName_d <- 1:numTrt
  SoilPara_d <- SoilPara[,3:6]
  dua_d	<- rep(CultivarPara$dua, times = numTrt)
  Water_d	<- rep(Water, times = numTrt)
  IrrigationTrt_d <- rep(IrrigationTrt, times = numTrt)
  MaxIntercept_d <- rep(MaxIntercept, times = numTrt)
  InitialBio_d <- rep(InitialBio, times = numTrt)
  InitialTT_d <- rep(InitialTT, times = numTrt)
  InitialFsolar_d <- rep(InitialFsolar, times = numTrt)
  Yield_obs_d	<- rep(Yield_obs, times = numTrt)
  Label_d <- rep(Label, times = numTrt)
  
  SIMPLE_in <- cbind(Trt_d, row_d, col_d, Exp_d, long_d, lat_d, sharepercent_d, country_d, Species_d, SowingDate_d, HarvestDate_d, Tsum_d, 
                     I50A_d, I50B_d, HI_d, WeatherID_d, CO2_d, SoilName_d, SoilPara_d, dua_d, Water_d, IrrigationTrt_d, 
                     MaxIntercept_d, InitialBio_d, InitialTT_d, InitialFsolar_d, Yield_obs_d, Label_d)
  colnames(SIMPLE_in) <- c('Trt.',	'row',	'col',	'Exp.',	'long',	'lat',	'sharepercent',	'country', 'Species.',	'SowingDate',	'HarvestDate',	'Tsum',	'I50A',	'I50B',
                            'HI',	'Weather',	'CO2',	'SoilName',	'AWC',	'RCN',	'DDC',	'RZD',	'dua',	'Water',	'IrrigationTrt',	'MaxIntercept',	'InitialBio',
                            'InitialTT', 	'InitialFsolar', 	'Yield_obs',	'Label')
  
  #write.csv(SIMPLE_in, paste0(inPath, GridInFileName), row.names=FALSE)
  
  cat("Done with preparing grid input for:", GridInFileName,'\n')
  #return(paste0(inPath, GridInFileName))
  return(SIMPLE_in)
}


# filter grids by CROPGRIDS harvest area 
filterByCROPGRIDS <- function(inPath, species, country_data) {
  library(ncdf4)
  library(raster)
  
  filtered_data <- data.frame()
  area_threshold <- 5
  mask_raster <- raster(xmn=min(country_data$long)-0.25, xmx=max(country_data$long)+0.25,
                          ymn=min(country_data$lat)-0.25, ymx=max(country_data$lat)+0.25,
                          res= 0.5, crs="+proj=longlat +datum=WGS84")

  HarvestAreaFile <- paste0(inPath, "Map/CROPGRIDSv1.05_NC_maps/", "CROPGRIDSv1.05_", species, ".nc")
#  HarvestAreaFile <- paste0(inPath, "Map/CROPGRIDSv1.05_NC_maps/Ghana/", "CROPGRIDSv1.05_", species, ".nc") # for Ghana only

  if (file.exists(HarvestAreaFile)){
    nc_data <- nc_open(HarvestAreaFile)
    lon <- ncvar_get(nc_data, "lon")
    lat <- ncvar_get(nc_data, "lat")
    ha <- ncvar_get(nc_data, "harvarea")
    nc_close(nc_data)
    
    rownames(ha) <- lon
    colnames(ha) <- lat

    ha_df <- as.data.frame.table(ha)
    colnames(ha_df) <- c('lon','lat','ha')
    ha_df$lon <- as.numeric(as.character(ha_df$lon))
    ha_df$lat <- as.numeric(as.character(ha_df$lat))
    # convert Ocean = -2, Not assessed = -1 to 0
    ha_df$ha[ha_df$ha < 0] <- 0

    mask_ha <- rasterize(ha_df[,c('lon','lat')], mask_raster, ha_df[,'ha'], fun=sum, na.rm=T)
    filterTable_data <- data.frame(cbind(xyFromCell(mask_ha, 1:ncell(mask_ha)), round(values(mask_ha),2)))
    colnames(filterTable_data) <- c('long','lat','Area_sum')

    filter_data <- subset(filterTable_data, Area_sum >= area_threshold, select = c(long, lat))
    filtered_data <- merge(country_data, filter_data[,1:2],  by = c("long", "lat"), all.x = FALSE, sort=FALSE) 
  }
  else {print(paste0("AutoGenGridInFile.R: Can't find CROPGRIDS data for ",species,"!"))}
  
  return(filtered_data)
}

# get sowing and harvest date from GGCMI crop calendar
CropCalendar <- function(inPath, species, filtered_data, dua) {
  library(ncdf4)
  library(raster)

  crop_calendar_list <- list(africaneggplant = "afe", barley = "bar", bean = "bea", cassava = "cas", cotton = "cot", maize = "mai", millet = "mil", 
                             groundnut = "nut", pea = "pea", potato = "pot", rapeseed = "rap", rice1 = "ri1", rice2 = "ri2", 
                             rye = "rye", sugarbeet = "sgb", sugarcane = "sgc", sorghum = "sor", soybean = "soy", sunflower = "sun", 
                             springwheat = "swh", winterwheat = "wwh", fingermillet = "mil", cowpea = "soy", grasspea = "pea",
                             pigeonpea = "pea", lablab = "soy", bambaragroundnut = "nut", yams = "cas", cocoyam = "cas",
                             sweetpotato = "pot", mungbean = "pea", tomato = "tom", pearlmillet = "mil", fonio = "fon",
                             josephscoat = "jos", okra = "okr", sesame = "ses", taro = "tar", tef = "tef")
#  crop_calendar_list <- list(bambaragroundnut = "bam", cassava = "cas", cocoyam = "coc", cowpea = "cow", fonio = "fon",
#                             maize = "mai", sorghum = "sor", soybean = "soy", sweetpotato = "swe", yams = "yam")  # for Ghana only 

  CropCalendarFile <- paste0(inPath, "Map/crop_calendar/", crop_calendar_list[species], "_rf_ggcmi_crop_calendar_phase3_v1.01.nc4") # uncomment for continent run
#  CropCalendarFile <- paste0(inPath, "Map/crop_calendar/Ghana/", crop_calendar_list[species], "_rf_ggcmi_crop_calendar_phase3_v1.01.nc") # for Ghana only
  print(CropCalendarFile)  
  if (file.exists(CropCalendarFile)){
    nc_data <- nc_open(CropCalendarFile)
    lon <- ncvar_get(nc_data, "lon") # uncomment for continent run 
    lat <- ncvar_get(nc_data, "lat") # uncomment for continent run
#    lon <- ncvar_get(nc_data, "longitude") # for Ghana only
#    lat <- ncvar_get(nc_data, "latitude") # for Ghana only

    # planting date
    pd <- ncvar_get(nc_data, "planting_day")
    rownames(pd) <- lon; colnames(pd) <- lat
    pd_df <- as.data.frame.table(pd)
    colnames(pd_df) <- c('long','lat','pd')
    pd_df$long <- as.numeric(as.character(pd_df$long))
    pd_df$lat <- as.numeric(as.character(pd_df$lat))

    # harvest date
    tryCatch({
      md <- ncvar_get(nc_data, "maturity_day")
      rownames(md) <- lon; colnames(md) <- lat
      md_df <- as.data.frame.table(md)
      colnames(md_df) <- c('long','lat','md')
      md_df$long <- as.numeric(as.character(md_df$long))
      md_df$lat <- as.numeric(as.character(md_df$lat))
    }, error = function(error_message) {
      message(paste0("AutoGenGridInFile.R: Cannot find Maturity Date in Crop Calendar for ", species, '! Use fixed dua in Cultivar file.'))
      md_df <<- pd_df
      colnames(md_df) <<- c('long','lat','md')
      md_df$md <<- md_df$md + dua - 1
    })
    d_match <- merge(merge(filtered_data[,1:2], pd_df, by = c("long", "lat"), all.x = TRUE, sort=FALSE), md_df, by = c("long", "lat"), all.x = TRUE, sort=FALSE)
    if (any(!is.na(d_match[,3:4]))) {filtered_data[,9:10] <- d_match[,3:4]}
    nc_close(nc_data)
  }
  return(filtered_data)
}
